{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "import numpy as np\n",
    "from sklearn.metrics import classification_report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "VOCAB_SIZE = 5000\n",
    "MAX_LEN = 400\n",
    "BATCH_SIZE = 32\n",
    "EMBED_DIM = 50\n",
    "FILTERS = 250\n",
    "N_CLASS = 2\n",
    "N_EPOCH = 2\n",
    "DISPLAY_STEP = 50"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "(X_train, y_train), (X_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=VOCAB_SIZE)\n",
    "X_train = tf.keras.preprocessing.sequence.pad_sequences(X_train, maxlen=MAX_LEN)\n",
    "X_test = tf.keras.preprocessing.sequence.pad_sequences(X_test, maxlen=MAX_LEN)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def pipeline_train(X, y, sess):\n",
    "    dataset = tf.data.Dataset.from_tensor_slices((X, y))\n",
    "    dataset = dataset.shuffle(len(X)).batch(BATCH_SIZE)\n",
    "    iterator = dataset.make_initializable_iterator()\n",
    "    X_ph = tf.placeholder(tf.int32, [None, MAX_LEN])\n",
    "    y_ph = tf.placeholder(tf.int64, [None])\n",
    "    init_dict = {X_ph: X, y_ph: y}\n",
    "    sess.run(iterator.initializer, init_dict)\n",
    "    return iterator, init_dict\n",
    "\n",
    "def pipeline_test(X, sess):\n",
    "    dataset = tf.data.Dataset.from_tensor_slices(X)\n",
    "    dataset = dataset.batch(BATCH_SIZE)\n",
    "    iterator = dataset.make_initializable_iterator()\n",
    "    X_ph = tf.placeholder(tf.int32, [None, MAX_LEN])\n",
    "    init_dict = {X_ph: X}\n",
    "    sess.run(iterator.initializer, init_dict)\n",
    "    return iterator, init_dict\n",
    "\n",
    "\n",
    "sess = tf.Session()\n",
    "iter_train, init_dict_train = pipeline_train(X_train, y_train, sess)\n",
    "iter_test, init_dict_test = pipeline_test(X_test, sess)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def forward(x, reuse, is_training):\n",
    "    with tf.variable_scope('model', reuse=reuse):\n",
    "        x = tf.contrib.layers.embed_sequence(x, VOCAB_SIZE, EMBED_DIM)\n",
    "        x = tf.layers.dropout(x, 0.2, training=is_training)\n",
    "        feat_map = []\n",
    "        for k_size in [3, 4, 5]:\n",
    "            _x = tf.layers.conv1d(x, FILTERS, k_size, activation=tf.nn.relu)\n",
    "            _x = tf.layers.max_pooling1d(_x, _x.get_shape().as_list()[1], 1)\n",
    "            _x = tf.reshape(_x, (tf.shape(x)[0], FILTERS))\n",
    "            feat_map.append(_x)\n",
    "        x = tf.concat(feat_map, -1)\n",
    "        x = tf.layers.dense(x, FILTERS, tf.nn.relu)\n",
    "        logits = tf.layers.dense(x, N_CLASS)\n",
    "    return logits"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From /usr/local/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py:497: calling conv1d (from tensorflow.python.ops.nn_ops) with data_format=NHWC is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "`NHWC` for data_format is deprecated, use `NWC` instead\n"
     ]
    }
   ],
   "source": [
    "ops = {}\n",
    "\n",
    "X_train_batch, y_train_batch = iter_train.get_next()\n",
    "X_test_batch = iter_test.get_next()\n",
    "\n",
    "logits_train_batch = forward(X_train_batch, reuse=False, is_training=True)\n",
    "ops['pred_logits'] = forward(X_test_batch, reuse=True, is_training=False)\n",
    "\n",
    "ops['global_step'] = tf.Variable(0, trainable=False)\n",
    "ops['lr'] = tf.train.exponential_decay(5e-3, ops['global_step'], 1400, 0.2)\n",
    "\n",
    "ops['loss'] = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(\n",
    "    logits=logits_train_batch, labels=y_train_batch))\n",
    "\n",
    "ops['train'] = tf.train.AdamOptimizer(ops['lr']).minimize(\n",
    "    ops['loss'], global_step=ops['global_step'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1 | Step 1 | Loss 0.724 | LR: 0.0050\n",
      "Epoch 1 | Step 50 | Loss 0.815 | LR: 0.0047\n",
      "Epoch 1 | Step 100 | Loss 0.576 | LR: 0.0045\n",
      "Epoch 1 | Step 150 | Loss 0.395 | LR: 0.0042\n",
      "Epoch 1 | Step 200 | Loss 0.225 | LR: 0.0040\n",
      "Epoch 1 | Step 250 | Loss 0.307 | LR: 0.0038\n",
      "Epoch 1 | Step 300 | Loss 0.223 | LR: 0.0035\n",
      "Epoch 1 | Step 350 | Loss 0.144 | LR: 0.0033\n",
      "Epoch 1 | Step 400 | Loss 0.181 | LR: 0.0032\n",
      "Epoch 1 | Step 450 | Loss 0.248 | LR: 0.0030\n",
      "Epoch 1 | Step 500 | Loss 0.279 | LR: 0.0028\n",
      "Epoch 1 | Step 550 | Loss 0.423 | LR: 0.0027\n",
      "Epoch 1 | Step 600 | Loss 0.282 | LR: 0.0025\n",
      "Epoch 1 | Step 650 | Loss 0.235 | LR: 0.0024\n",
      "Epoch 1 | Step 700 | Loss 0.315 | LR: 0.0022\n",
      "Epoch 1 | Step 750 | Loss 0.172 | LR: 0.0021\n",
      "\n",
      "Accuracy: 0.8850\n",
      "\n",
      "Epoch 2 | Step 800 | Loss 0.127 | LR: 0.0020\n",
      "Epoch 2 | Step 850 | Loss 0.419 | LR: 0.0019\n",
      "Epoch 2 | Step 900 | Loss 0.186 | LR: 0.0018\n",
      "Epoch 2 | Step 950 | Loss 0.162 | LR: 0.0017\n",
      "Epoch 2 | Step 1000 | Loss 0.093 | LR: 0.0016\n",
      "Epoch 2 | Step 1050 | Loss 0.080 | LR: 0.0015\n",
      "Epoch 2 | Step 1100 | Loss 0.112 | LR: 0.0014\n",
      "Epoch 2 | Step 1150 | Loss 0.301 | LR: 0.0013\n",
      "Epoch 2 | Step 1200 | Loss 0.142 | LR: 0.0013\n",
      "Epoch 2 | Step 1250 | Loss 0.549 | LR: 0.0012\n",
      "Epoch 2 | Step 1300 | Loss 0.162 | LR: 0.0011\n",
      "Epoch 2 | Step 1350 | Loss 0.334 | LR: 0.0011\n",
      "Epoch 2 | Step 1400 | Loss 0.176 | LR: 0.0010\n",
      "Epoch 2 | Step 1450 | Loss 0.115 | LR: 0.0009\n",
      "Epoch 2 | Step 1500 | Loss 0.100 | LR: 0.0009\n",
      "\n",
      "Accuracy: 0.8907\n",
      "\n"
     ]
    }
   ],
   "source": [
    "sess.run(tf.global_variables_initializer())\n",
    "for epoch in range(1, N_EPOCH+1):\n",
    "    while True:\n",
    "        try:\n",
    "            sess.run(ops['train'])\n",
    "        except tf.errors.OutOfRangeError:\n",
    "            break\n",
    "        else:\n",
    "            step = sess.run(ops['global_step'])\n",
    "            if step % DISPLAY_STEP == 0 or step == 1:\n",
    "                loss, lr = sess.run([ops['loss'], ops['lr']])\n",
    "                print(\"Epoch %d | Step %d | Loss %.3f | LR: %.4f\" % (epoch, step, loss, lr))\n",
    "    \n",
    "    y_pred_li = []\n",
    "    while True:\n",
    "        try:\n",
    "            y_pred_li.append(sess.run(ops['pred_logits']))\n",
    "        except tf.errors.OutOfRangeError:\n",
    "            break\n",
    "    y_pred = np.argmax(np.vstack(y_pred_li), 1)\n",
    "    print(\"\\nAccuracy: %.4f\\n\" % (y_pred==y_test).mean())\n",
    "    \n",
    "    if epoch != N_EPOCH:\n",
    "        sess.run(iter_train.initializer, init_dict_train)\n",
    "        sess.run(iter_test.initializer, init_dict_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
